home *** CD-ROM | disk | FTP | other *** search
- unit Index;
- {$I-}
- interface
- const
- website = 'http://www.drbob42.com';
-
- const
- IdentSet = ['A'..'Z','a'..'z','0'..'9','-','+'];
- StartSet = ['A'..'Z','a'..'z'];
-
- const
- MaxPage = 255;
-
- type
- TNumPage = 0..MaxPage; { max number of webpages in site }
- TURLPage = ShortString { assuming URL <= 255 characters };
-
- var
- WebPages: TNumPage = 0;
- WebPage: Array[TNumPage] of TURLPage;
-
- const
- MaxKeyword = 31;
-
- type
- TKeyword = String[MaxKeyword];
- TPageSet = Set of TNumPage;
-
- type
- TNode = record
- Keyword: TKeyword; { 32 bytes }
- URLs: TPageSet; { 32 bytes }
- end {TNode};
-
- TTree = class
- Node: TNode;
- constructor Create(const Keyword: TKeyword; WebPage: TNumPage);
- destructor Destroy; override;
- private
- Prev,Next: TTree;
- end {TTree};
-
- var
- Keywords: Integer = 0;
- root: TTree = nil;
-
- type
- TIndexFile = File of TNode;
-
- implementation
- uses
- SysUtils;
-
- constructor TTree.Create(const Keyword: TKeyword; WebPage: TNumPage);
- begin
- inherited Create;
- Inc(Keywords); // keep track of number of keywords
- Prev := nil;
- Next := nil;
- Node.Keyword := Keyword;
- Node.URLs := [WebPage]
- end {Create};
-
- destructor TTree.Destroy;
- begin
- if Prev <> nil then Prev.Free;
- if Next <> nil then Next.Free;
- inherited Destroy
- end {Destroy};
-
- procedure AddKeyword(const Keyword: TKeyword; WebPage: TNumPage);
- var
- tmp: TTree;
- begin
- if root = nil then
- root := TTree.Create(Keyword,WebPage)
- else { search }
- begin
- tmp := root;
- repeat
- if tmp.Node.Keyword > Keyword then
- begin
- if tmp.Prev = nil then
- tmp.Prev := TTree.Create(Keyword,WebPage);
- tmp := tmp.Prev
- end
- else
- if tmp.Node.Keyword < Keyword then
- begin
- if tmp.Next = nil then
- tmp.Next := TTree.Create(Keyword,WebPage);
- tmp := tmp.Next
- end
- until tmp.Node.Keyword = Keyword;
- tmp.Node.URLs := tmp.Node.URLs + [WebPage]
- end
- end {AddKeyword};
-
- procedure ScanPage(const FileName: ShortString; WebPage: TNumPage);
- var
- f: Text;
- NotInTag: Boolean;
- Keyword: ShortString;
- Len: Byte absolute Keyword;
- begin
- assign(f,FileName);
- reset(f);
- if IOResult = 0 then
- begin
- writeln('<LI><B>',FileName,'</B>');
- NotInTag := True;
- while not eof(f) do
- begin
- Len := 0;
- while not eoln(f) do
- begin
- Inc(Len);
- read(f,Keyword[Len]);
- if not (Keyword[Len] in IdentSet) then
- begin
- Dec(Len);
- if (Len > 2) and NotInTag then
- AddKeyword(LowerCase(Keyword),WebPage);
- if Keyword[Len+1] = '>' then NotInTag := True
- else
- if Keyword[Len+1] = '<' then NotInTag := False;
- Len := 0
- end
- else
- if (Len = 1) then { start with letter ?? }
- if not (Keyword[1] in StartSet) then Len := 0
- end;
- if (Len > 2) and NotInTag then
- AddKeyword(LowerCase(Keyword),WebPage);
- readln(f)
- end;
- close(f)
- end
- else
- writeln('<LI>',FileName); { failed to open }
- end {ScanPage};
-
- procedure ScanPages(const Path: ShortString);
- var
- SRec: TSearchRec;
- begin
- if FindFirst('*.*', faDirectory, SRec) = 0 then
- repeat
- if (SRec.Attr AND faDirectory) = faDirectory then
- begin
- if (SRec.Name[1] <> '.') then { skip '.' and '..' }
- if Pos('_vti',SRec.Name) = 0 then { _vti_cnf etc. }
- begin
- ChDir(SRec.Name);
- if IOResult = 0 then
- begin
- writeln('<LI><I>',SRec.Name,'</I>');
- writeln('<UL>');
- ScanPages(Path+'/'+SRec.Name); { recursive!! }
- writeln('</UL>');
- ChDir('..')
- end
- else
- writeln('<LI><I>',SRec.Name,'</I> - locked')
- end
- end
- else { file }
- if (Pos('.HTM',UpperCase(SRec.Name)) > 0) or
- (Pos('.ASP',UpperCase(SRec.Name)) > 0) then
- begin
- WebPage[WebPages] := Path + '/' + SRec.Name;
- ScanPage(SRec.Name,WebPages);
- Inc(WebPages)
- end
- until FindNext(SRec) <> 0;
- FindClose(SRec)
- end {ScanPages};
-
- procedure WriteTree(var IndexFile: TIndexFile; root: TTree);
- begin
- if root.Prev <> nil then WriteTree(IndexFile,root.Prev);
- write(IndexFile,root.Node);
- if root.Next <> nil then WriteTree(IndexFile,root.Next);
- end {WriteTree};
-
- var
- i: Integer;
- PageFile: Text;
- IndexFile: TIndexFile;
-
- initialization
- ChDir('..');
- if IOResult <> 0 then { skip };
- writeln('content-type: text/html');
- writeln;
- writeln('<HTML>');
- writeln('<BODY BACKGROUND="/gif/back.gif">');
- writeln('<H2>IndexBob</H2>');
- writeln('Creating index for: ',website);
- writeln('<P>');
- writeln('<UL>');
- ScanPages(website);
- writeln('</UL>');
- ChDir('cgi-bin');
- if IOResult <> 0 then { skip };
- assign(PageFile,'pages.bob');
- try
- rewrite(PageFile);
- for i:=0 to WebPages-1 do
- writeln(PageFile,WebPage[i]);
- finally
- close(PageFile)
- end;
- assign(IndexFile,'index.bob');
- if root <> nil then
- try
- rewrite(IndexFile);
- WriteTree(IndexFile,root)
- finally
- close(IndexFile)
- end;
- writeln('<HR>');
- writeln('<FONT SIZE=1>');
- writeln('Webpages: ',WebPages);
- writeln('<BR>Keywords: ',Keywords);
- writeln('</FONT>');
- writeln('<HR>');
- writeln('</BODY>');
- writeln('</HTML>')
- finalization
- root.Free
- end.
-